const puppeteer = require('puppeteer')
const cheerio = require('cheerio')
const fs = require('fs')
const path = require('path')

const tiebaArr = ['奢侈品', '奢侈品原单', '精仿', '原单', 'mulberry', 'prada', 'loewe', '罗意威', '纪梵希', '原单服饰', 'mj', 'Lv', 'tory', 'burch', 'celine', '宝格丽三宅一生']
const wxreg=/[a-zA-Z]{1}[-_a-zA-Z0-9]{5,19}/g;  //微信正则表达式
const URI = 'http://tieba.baidu.com/f?kw='+tiebaArr[1]

function getGood(url) {
    return puppeteer.launch({executablePath: "D:/Program Files (x86)/chrome-win32/chrome.exe", args: ['--no-sandbox', '--disable-setuid-sandbox'] }).then(async browser => {
        const page = await browser.newPage();
        await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1')
        await page.goto(url)

        let bodyHTML = await page.evaluate(() => document.body.innerHTML)
        //fs.writeFileSync(path.join(__dirname, '_crawler.html'), bodyHTML)
        let $ = cheerio.load(bodyHTML)

        $('.threads_list>.tl_shadow>a').map((key,e)=>{
            let href = $(e).attr().href
            if(href != 'javascript:;'){
                let neiurl = 'http://tieba.baidu.com';
                neiurl += href.substr(0,href.indexOf('?'))
                //console.log(neiurl)
                puppeteer.launch({executablePath: "D:/Program Files (x86)/chrome-win32/chrome.exe", args: ['--no-sandbox', '--disable-setuid-sandbox'] }).then(async browser => {
                    const page = await browser.newPage();
                    await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1')
                    await page.goto(neiurl)

                    let bodyHTML = await page.evaluate(() => document.body.innerHTML)
                    //fs.writeFileSync(path.join(__dirname, k+'_crawler.html'), bodyHTML)
                    const $ = cheerio.load(bodyHTML)

                    let txt = ''
                    $('#pblist>li>.list_item_wrapper>.list_main>.list_item_top>.content').map((k,e)=>{
                        let t = $(e).text()    //每条评论的全文
                        /*t = t.replace('/\s/g','');
                        t = t.replace('/\n/g','');
                        t = t.replace('/\r/g','');
                        t = t.replace('/\t/g','');*/
//                        console.log(t)
                        if(t){
                            let match
                            if(match = wxreg.exec(t)) {
                                txt += match[0]+'\n'
                                txt += k+t+'\n\n'
                            }
                            console.log(t)
                        }
                    })

                    if(txt){
                        fs.writeFileSync(path.join(__dirname, tiebaArr[1]+'title.txt'), txt)
                    }



                    await browser.close()
                    return
                })
            }
        })
        await browser.close()
        return
    })
}

getGood(URI)